/*******************************************************************************
Last update: June 29, 2023 
********************************************************************************/

cls
clear all
*local dir "~/Dropbox/Research/Distinction Effect"
local dir "D:\Dropbox\Distinction Effect\Replication Package"
cd "`dir'"

qui do "Does/Data Processing/0. Cleaning Programs.do"

/*******************************************************************************
			1. Merge Data: OLE/PILA (Labor Market) + SPADIES + Saber11
********************************************************************************/
use "Data/Intermediates/SaberPro_2006-2010.dta", clear

merge m:1 national* using "Data/Intermediates/SaberPro_Spadies.dta", gen(spa_merge)
keep if spa_merge == 1 | spa_merge == 3

merge m:1 national* using "Data/Intermediates/SaberProSaber11_Complete", gen(sb11_merge)
keep if sb11_merge == 1 | sb11_merge == 3

replace national_id_type = "CC" if national_id_type == "C"
replace national_id_type = "CE" if national_id_type == "E"
merge m:1 national* using "Data/Intermediates/OLE_2007_2016.dta", gen(ole_merge)
keep if ole_merge == 1 | ole_merge == 3
rename gender ole_gender

merge m:1 national* using "Data/Intermediates/Pila_2009_2016.dta", gen(pila_merge) keepusing(pila*)
keep if pila_merge == 1 | pila_merge == 3

save "Data/Finals/Complete Sample.dta", replace

/*******************************************************************************
			2. Prepare Estimation Sample
********************************************************************************/
use "Data/Finals/Complete Sample.dta", clear

/*******************************************************************************
Covariates
********************************************************************************/

//Awardees Indicator
rename spro_specific_award award

//Exam Dates
gen exam_date = date("04/06/2006", "DMY") if exam_time == 20061
replace exam_date = date("03/06/2007", "DMY") if exam_time == 20071
replace exam_date = date("08/06/2008", "DMY") if exam_time == 20081
replace exam_date = date("26/11/2006", "DMY") if exam_time == 20062
replace exam_date = date("25/11/2007", "DMY") if exam_time == 20072
replace exam_date = date("23/11/2008", "DMY") if exam_time == 20082
replace exam_date = date("29/11/2009", "DMY") if exam_time == 20092
replace exam_date = date("06/06/2010", "DMY") if exam_time == 20102
replace exam_date = date("12/09/2010", "DMY") if exam_time == 20103
format exam_date %td

//Graduation Term
gen grad_time = fst_grad
replace grad_time = spa_gradtime if grad_time == .
tostring grad_time, gen(aux)
gen grad_year = substr(aux, 1, 4)
gen grad_sem = substr(aux, -1, 1)
destring grad_year grad_sem, replace
drop aux

gen grad_date = fst_graddate if fst_graddate > exam_date & fst_graddate != .
tostring grad_year, replace
gen aux = date("30/06"+grad_year, "DMY") if grad_sem == 1
replace aux = date("30/12"+grad_year, "DMY") if grad_sem == 2
destring grad_year, replace
replace grad_date = aux if grad_date == . & grad_year >= exam_year & grad_year != .
format grad_date %td
drop aux

//Birthday
gen birthdate = spro_birthdate
replace birthdate = . if year(birthdate) < 1940
replace birthdate = . if year(birthdate) > 1992
replace birthdate = ole_birthdate  if birthdate == .
replace birthdate = sb11_birthdate if birthdate == .
replace birthdate = spa_birthdate if birthdate == .
		
//Age at Exam
gen age = (exam_date - birthdate)/365.25
replace age = round(age, 1)
replace age = . if age < 18 | age > 62
gen age2 = age^2

replace birthdate = . if age == .

//Gender
cap drop female
gen female = 1 if spro_gender == 1 & spro_gender != .
replace female = 0 if spro_gender == 0 & female == .
replace female = 1 if ole_gender == "F" & female == .
replace female = 0 if ole_gender == "M" & female == .
replace female = 1 if sb11_gender == 0  & female == .
replace female = 0 if sb11_gender == 1  & female == .
replace female = 1 if spa_gender == 1  & female == .
replace female = 0 if spa_gender == 0  & female == .

//Parents' Education (Diccionario 2012-2013. Comparar con 2006-2009)
//Mother's
cap drop meduc_*
gen meduc_none = (spro_educmother == 0 | spro_educmother == 1 | spro_educmother == 2 | spro_educmother == 99 | spro_educmother == .)
gen meduc_elmnt = (spro_educmother == 3 | spro_educmother == 9 | spro_educmother == 10) if spro_educmother != .
gen meduc_hs = (spro_educmother == 4 | spro_educmother == 5 | spro_educmother == 11 | spro_educmother == 12) if spro_educmother != .
gen meduc_cllg = (spro_educmother == 6 | spro_educmother == 7 | (spro_educmother >= 13 & spro_educmother <= 16)) if spro_educmother != .
gen meduc_grdsch = (spro_educmother == 8 | spro_educmother == 17) if spro_educmother != .
gen meduc_cllg2Y = (spro_educmother == 6 | spro_educmother == 13 | spro_educmother == 14) if spro_educmother != .
gen meduc_cllg4Y = (spro_educmother == 7 | spro_educmother == 15 | spro_educmother == 16) if spro_educmother != .

//Father's
cap drop feduc_*
gen feduc_none = (spro_educfather == 0 | spro_educfather == 1 | spro_educfather == 2 | spro_educfather == 99 | spro_educfather == .)
gen feduc_elmnt = (spro_educfather == 3 | spro_educfather == 9 | spro_educfather == 10) if spro_educfather != .
gen feduc_hs = (spro_educfather == 4 | spro_educfather == 5 | spro_educfather == 11 | spro_educfather == 12) if spro_educfather != .
gen feduc_cllg = (spro_educfather == 6 | spro_educfather == 7 | (spro_educfather >= 13 & spro_educfather <= 16)) if spro_educfather != .
gen feduc_grdsch = (spro_educfather == 8 | spro_educfather == 17) if spro_educfather != .
gen feduc_cllg2Y = (spro_educfather == 6 | spro_educfather == 13 | spro_educfather == 14) if spro_educfather != .
gen feduc_cllg4Y = (spro_educfather == 7 | spro_educfather == 15 | spro_educfather == 16) if spro_educfather != .

//Does any of the parents attended college?
cap drop parents_cllg
gen parents_cllg = 1 if (meduc_cllg == 1 | meduc_grdsch == 1) & (meduc_cllg == 1 | feduc_grdsch == 1) 
replace parents_cllg = 0 if parents_cllg == . & (spro_educmother != . | spro_educfather != .)

//Parents' Occupation 
//Father's
gen foccp_other = (spro_occupfather==7 | spro_occupfather==11 | spro_occupfather==12 | spro_occupfather==24 | spro_occupfather==25 | spro_occupfather==26 | spro_occupfather==99 | spro_occupfather==.)
gen foccp_mngr = (spro_occupfather==1 | spro_occupfather==2 | spro_occupfather==13 | spro_occupfather==14 | spro_occupfather==15 | spro_occupfather==16)
gen foccp_whitecllr = (spro_occupfather==4 | spro_occupfather==6 | spro_occupfather==18 | spro_occupfather==17)
gen foccp_bluecllr = (spro_occupfather == 8 | spro_occupfather == 19)
gen foccp_home = (spro_occupfather==22 | spro_occupfather==10 | spro_occupfather==23 | spro_occupfather==9)
gen foccp_selfemp = (spro_occupfather==3 | spro_occupfather==5 | spro_occupfather==20 | spro_occupfather==21)

//Mother's
gen moccp_other = (spro_occupmother==7 | spro_occupmother==11 | spro_occupmother==12 | spro_occupmother==24 | spro_occupmother==25 | spro_occupmother==26 | spro_occupmother==99 | spro_occupmother==.)
gen moccp_mngr = (spro_occupmother==1 | spro_occupmother==2 | spro_occupmother==13 | spro_occupmother==14 | spro_occupmother==15 | spro_occupmother==16)
gen moccp_whitecllr = (spro_occupmother==4 | spro_occupmother==6 | spro_occupmother==18 | spro_occupmother==17)
gen moccp_bluecllr = (spro_occupmother == 8 | spro_occupmother == 19)
gen moccp_home = (spro_occupmother==22 | spro_occupmother==10 | spro_occupmother==23 | spro_occupmother==9)
gen moccp_selfemp = (spro_occupmother==3 | spro_occupmother==5 | spro_occupmother==20 | spro_occupmother==21)

//either way: 5 6 14 21 26
local other 7 9 11 12 23 24 25 99 26
local blue_collar 8 10 19 22
local white_collar 1 2 3 4 5 6 13 14 15 16 17 18 20 21 
cap drop father_bcollar
gen father_bcollar = 0 if spro_occupfather != .
foreach i in `blue_collar' {
	replace father_bcollar = 1 if spro_occupfather == `i' 
}
foreach i in `other' {
	replace father_bcollar = . if spro_occupfather == `i'
}

cap drop mother_bcollar
gen mother_bcollar = 0 if spro_occupmother != .
foreach i in `blue_collar' {
	replace mother_bcollar = 1 if spro_occupmother == `i' 
}
foreach i in `other' {
	replace mother_bcollar = . if spro_occupmother == `i'
}

//Does any of the parents work as a blue-collar?
cap drop parents_bcollar
gen parents_bcollar = 1 if mother_bcollar == 1 | father_bcollar == 1
replace parents_bcollar = 0 if parents_bcollar == . & (father_bcollar != . | mother_bcollar != .)

//Living in Principal City?
global Cities BOGOT MEDELLIN CALI BARRANQUILLA BUCARAMANGA MANIZALES PEREIRA CUCUTA PASTO IBAGUE MONTERIA CARTAGENA VILLAVICENCIO  
gen main_city = 0
foreach muni in $Cities {
	replace main_city = 1 if regexm(spro_municipio, "`muni'")
}

//Socioeconomic Stratum (College)
gen stratum = spro_stratum if spro_stratum != 8
replace stratum = spa_stratum if stratum == . & spa_stratum != . & spa_stratum != 8
gen low_SES = stratum <= 2
gen high_SES = stratum >= 4

//Socioeconomic Stratum (High School)
gen highSES = spa_stratum
//Impute stratum using college information
replace highSES = stratum if highSES == . 
replace highSES = highSES >= 4 if highSES != .
//Dummy if we imputed stratum
gen d_highSES = spa_stratum != .

//Does the student works?
gen working = 1 if spro_work==3 | spro_work==4 | spro_work==5
replace working = 0 if spro_work==0

//Semester in College Program
cap drop num_semesters
gen num_semesters = spro_semester if spro_semester != 99
replace num_semesters = 11 if num_semesters >= 11 & num_semesters != .

gen aux = 1 if num_semesters <= 6 & num_semesters != .
replace aux = 2 if num_semesters == 7 | num_semesters == 8
replace aux = 3 if num_semesters == 9 | num_semesters == 10
replace aux = 4 if num_semesters == 11 | num_semesters == 12
tab aux, gen(d_semester)
drop aux

//Single?
gen single = (spro_maritalstatus == 1) if spro_maritalstatus != .
	
//Married/Cohabiting?
gen married	 = (spro_maritalstatus == 2 | spro_maritalstatus == 5) if spro_maritalstatus != .

//Private College
rename cllg_prv aux1
bys spro_cllgcode : egen cllg_prv = mean(aux1)
tab cllg_prv
foreach i in 2726 2729 2801 2806 2809 2819 2839 {
	replace cllg_prv = 1 if spro_cllgcode == `i'
}
drop aux1

//Standardized Reading and English Scores
gen reading = .
gen english = .
levelsof exam_time, local(T)
qui foreach t of local T {
	sum spro_reading if exam_time == `t'
	replace reading = (spro_reading - r(mean))/r(sd) if exam_time == `t'
	sum spro_english if exam_time == `t'
	replace english = (spro_english - r(mean))/r(sd) if exam_time == `t'		
}

//Top 20 Universities
//COLOMBIA: First national university rankings unveiled
//https://www.universityworldnews.com/post.php?story=20110204224146865
//Los indicadores no consideran el Saber Pro
//Los indicadores usados son: publicaciones en revistas indexadas, numero maestrias y doctorados, grupos de investigacion reconocidos por Colciencias
cap drop univ*
cap rename cllg_code spro_cllgcode
gen univ_1 = (spro_cllgcode==1101) //UNal
gen univ_2 = (spro_cllgcode == 1201) //Universidad de Antioquia
gen univ_3 = (spro_cllgcode == 1203) //Universidad del Valle
gen univ_4 = (spro_cllgcode == 1813) //Universidad de Los Andes
gen univ_5 = (spro_cllgcode == 1701) //Universidad Javeriana

gen univ_6 = (spro_cllgcode==1102) //UNal*
gen univ_7 = (spro_cllgcode == 1204) //Universidad Industrial de Santander
gen univ_8 = (spro_cllgcode == 1713) //Universidad del Norte
gen univ_9 = (spro_cllgcode == 1110) //Universidad del Cauca
gen univ_10 = (spro_cllgcode == 2711) //Universidad Tecnologica de Pereira*
gen univ_11 = (spro_cllgcode == 1112) //Universidad de Caldas
gen univ_12 = (spro_cllgcode == 1710) //Universidad Pontificia Bolivariana
gen univ_13 = (spro_cllgcode == 1106) //Universidad Tecnologica y Pedagogica*
gen univ_14 = (spro_cllgcode == 1205) //Universidad de Cartagena*
gen univ_15 = (spro_cllgcode == 1706) //Universidad Externado de Colombia
gen univ_16 = (spro_cllgcode == 1714) //Universidad del Rosario
gen univ_17 = (spro_cllgcode == 1301) //Universidad Distrital
gen univ_18 = (spro_cllgcode == 1704) //Universidad Santo Tomas*
gen univ_19 = (spro_cllgcode == 1712) //Universidad EAFIT	
gen univ_20 = (spro_cllgcode == 1711) //Universidad de La Sabana

cap drop top5 top20 above20
gen top5 = 0
gen top20 = 0
foreach i of numlist 1(1)20 {
	if `i' <= 5 {
	replace top5 = 1 if univ_`i' == 1		
	}
	if `i' >= 6 & `i' <= 20 {
	replace top20 = 1 if univ_`i' == 1		
	}		
}
gen above20 = (top5==0 & top20==0)

//College Reputation
rename spro_cllgcode cllg_code
cap drop cllg_reputation
merge m:1 cllg_code using "Data/Finals/CollegeReputation.dta"
keep if _merge == 3 | _merge == 1
drop _merge

tempvar score cllg_score
egen `score' = rowmax(sb11_overall_pctile spa_sb11score)
bys cllg_code : egen `cllg_score' = mean(`score')
corr `cllg_score' cllg_reputation // rho = 0.9613

cap drop d_cllg_reputation
gen d_cllg_reputation = cllg_reputation == .

cap drop cllg_reputation_sd
gen cllg_reputation_sd = cllg_reputation
replace cllg_reputation_sd = `cllg_score' if cllg_reputation_sd == .

//College-Program Network
gen programcode = spro_sniescode
merge m:1 programcode using "Data/Finals/ProgramNetwork.dta"
keep if _merge == 3 | _merge == 1
drop _merge

//Area of Study (Using Field-Specific Test)
gen area = .
foreach i in "INGENIERIA" "ELECTRONICA" "SISTEMAS"{
replace area = 8 if regexm(exam_name, "`i'")
}
foreach i in "ADMINISTRACION" "CONTADURIA" "ECONOMIA"{
	replace area = 6 if regexm(exam_name,"`i'")
}
foreach i in "COMUNICACION E INFORMACION" "DERECHO" "PSICOLOGIA" "TRABAJO SOCIAL" "LICENCIATURA" {	
	replace area = 5 if regexm(exam_name,"`i'")
}
foreach i in "BACTERIOLOGIA" "ENFERMERIA" "FISIOTERAPIA" "FONOAUDIOLOGIA" "MEDICINA" "NUTRICION Y DIETETICA" "ODONTOLOGIA" "OPTOMETRIA" "TERAPIA OCUPACIONAL" "INSTRUMENTACION QUIRURGICA" {
	replace area = 4 if regexm(exam_name,"`i'")	
}
foreach i in "QUIMICA" "BIOLOGIA" "FISICA" "GEOLOGIA" "MATEMATICA" {
	replace area = 9 if regexm(exam_name,"`i'")	
}
foreach i in "MEDICINA VETERINARIA" "ZOOTECNIA" "AGRONOMICA" "AGRICOLA" "AGROINDUSTRIAL" {
	replace area = 1 if regexm(exam_name,"`i'")	
}
label define lab_area 8 "Engineering" 9 "Math and Sciences" 6 "Business and Economics" 5 "Social Sciences" 1 "Agricultural Sciences" 4 "Health"
lab values area lab_area
	
tab area, gen(area)	

//Field
rename program_field field_name
merge m:1 field_name using "Data/Originals/SNIES/Nucleo Basico de Conocimiento", keepusing(field_code)
keep if _merge == 3 | _merge == 1
drop _merge

//Saber 11 Score
egen sb11score = rowmax(sb11_overall_pctile spa_sb11score)
	
	gen d_sb11score = (sb11score==.)
	
	tab stratum, gen(fincome_)
	replace fincome_5 = 1 if fincome_6 == 1
	
	//Impute Saber 11
	//global T reading english
	global X female age age2 fincome_2 fincome_3 fincome_4 fincome_5 num_semesters
	global M meduc_hs meduc_cllg meduc_grdsch moccp_mngr moccp_whitecllr moccp_bluecllr moccp_selfemp
	global F feduc_hs feduc_cllg feduc_grdsch foccp_mngr foccp_whitecllr foccp_bluecllr foccp_selfemp
	global FE spro_sniescode spro_munireside exam_time
	
	global Covs $X $M $F $T
	
	reghdfe sb11score $Covs, absorb($FE, savefe) keepsingletons
	
	//Predict
	local i = 1
	foreach FE in $FE {
	   	bys `FE' : egen FE_`FE' = mean(__hdfe`i'__)
	local i = `i' + 1
	}
	egen FE = rowtotal(FE_*)
	gen xb = 0
	foreach x in $Covs {
	    replace xb = _b[`x']*`x' + xb
	}
	gen y_score = _b[_cons] + xb + FE
	drop xb FE* _*
	
	//Round
	replace y_score = round(y_score, 1)
	replace y_score = 1 if y_score < 1 & y_score != .
	replace y_score = 100 if y_score > 100 & y_score != .
	replace y_score = sb11score if sb11score != .
	rename sb11score saber11
	rename y_score sb11score
	
	bys cllg_code exam_time: egen aux = mean(sb11score)
	replace aux = round(aux, 1)
	replace aux = 1 if aux < 1 & aux != .
	replace aux = 100 if aux > 100 & aux != .
	replace sb11score = aux if sb11score == .
	drop aux
	
	sum sb11score if award == 1
	
	levelsof exam_time, local(Time)
	foreach t of local Time {
		sum sb11score if exam_time == `t'
		replace sb11score = (sb11score - r(mean))/r(sd) if exam_time == `t'
	}

/*******************************************************************************
Running Variable
********************************************************************************/

//Specific Exam x Exam Time
egen fieldtest_time = group(exam_id exam_time)

//Running 1: Overall Scores
egen double cutoff_aux = min(spro_puntaje) if award == 1, by(fieldtest_time)
egen double U_cutoff = min(cutoff_aux), by(fieldtest_time)
drop cutoff_aux
egen double cutoff_aux = max(spro_puntaje) if award == 0, by(fieldtest_time)
egen double L_cutoff = min(cutoff_aux), by(fieldtest_time)
drop cutoff_aux
cap drop score_cutoff
gen double score_cutoff = (U_cutoff + L_cutoff)/2
drop U_cutoff L_cutoff

cap drop sd_score
bys fieldtest_time : egen double sd_score = sd(spro_puntaje)

cap drop score_sd
gen double score_sd = (spro_puntaje - score_cutoff)/sd_score

//Check
count if score_sd >= 0 & award == 0
count if score_sd < 0 & award == 1

//Running 2: Ranking
bys fieldtest_time : egen aux1 = rank(spro_puntaje) if spro_puntaje != ., field
gen rank = .
levelsof fieldtest_time, local(Program_Year)
qui foreach i of local Program_Year {
    egen aux2 = group(aux1) if fieldtest_time == `i'
	replace rank = aux2 if fieldtest_time == `i'
	drop aux2
}
sort fieldtest_time spro_puntaje
drop aux1

egen double cutoff_aux = max(rank) if award == 1, by(fieldtest_time)
egen double rank_cutoff = min(cutoff_aux), by(fieldtest_time)
drop cutoff_aux

gen rank_norm = rank_cutoff - rank

//Check
count if rank_norm >= 0 & award == 0
count if rank_norm < 0 & award == 1

/*******************************************************************************
Outcomes
********************************************************************************/

/**********************
Real Earnings
***********************/
//Consumer Price Index
local ipc_2007_2007 = 1
local ipc_2007_2008 = 1.076828139
local ipc_2007_2009 = 1.098426412
local ipc_2007_2010 = 1.133137920
local ipc_2007_2011 = 1.175408824
local ipc_2007_2012 = 1.204103672
local ipc_2007_2013 = 1.227398951
local ipc_2007_2014 = 1.272292502
local ipc_2007_2015 = 1.358377044
local ipc_2007_2016 = 1.436439371
//Minimum Wage
local minwage_2007 = 433700
local minwage_2008 = 461500
local minwage_2009 = 496900
local minwage_2010 = 515000
local minwage_2011 = 535600
local minwage_2012 = 566700
local minwage_2013 = 589500
local minwage_2014 = 616000
local minwage_2015 = 644350
local minwage_2016 = 689455
foreach year of numlist 2007(1)2016{
	//earnings observed after graduation
	local condition1 `year' >= grad_year
	//earnings observed after taking the exam
	local condition2 grad_year >= exam_year & grad_year != .
	//positive earnings
	local condition3 ole_earnings_`year' > 0
	//earnings without minimum wage
	local MW ole_earnings_`year' > 1.005*`minwage_`year''

	cap drop earningsMW_`year'
	gen double earningsMW_`year' = ole_earnings_`year' if `condition1' & `condition2' & `condition3' & `MW'	
	cap drop earnings_`year'
	gen double earnings_`year' = ole_earnings_`year' if `condition1' & `condition2' & `condition3'
		
	if `year' >= 2009 {
	//positive earnings
	local condition3 pila_earnings_`year' > 0
	//formal workers observed earnings
	# delimit ;
	local condition4 (
	pila_worker_type_`year'==1 |
	pila_worker_type_`year'==18 |
	pila_worker_type_`year'==22 |
	pila_worker_type_`year'==30 |
	pila_worker_type_`year'==32 |
	pila_worker_type_`year'==47
	)
	;# delimit cr
	//earnings without minimum wage
	local MW pila_earnings_`year' > 1.005*`minwage_`year'' 
	
	replace earningsMW_`year' = pila_earnings_`year' if `condition1' & `condition2' & `condition3' & `condition4' & `MW' & earningsMW_`year' == .
	replace earnings_`year' = pila_earnings_`year' if `condition1' & `condition2' & `condition3' & `condition4' & earnings_`year' == .
	}
	
	//Real Earnings (base: 2007)
	replace earnings_`year' =  earnings_`year'/`ipc_2007_`year''
	replace earningsMW_`year' =  earningsMW_`year'/`ipc_2007_`year''
}

order earnings_???? earningsMW_????, last

//Earnings by Age
foreach age of numlist 23(1)30 {
	
	cap drop earnings_`age'
	gen double earnings_`age' = .
	cap drop earningsMW_`age'
	gen double earningsMW_`age' = .
	
	foreach year of numlist 2008(1)2016 { 
	replace earnings_`age' = earnings_`year' 	 if `age' == `year' - year(birthdate)
	replace earningsMW_`age' = earningsMW_`year' if `age' == `year' - year(birthdate)
	}
}

egen double aux = rowmean(earnings_23 earnings_24 earnings_25 earnings_26)
cap drop ln_earnings_23_26
gen ln_earnings_23_26 = ln(aux)
drop aux

egen double aux = rowmean(earnings_23 earnings_24 earnings_25 earnings_26 earnings_27 earnings_28)
cap drop ln_earnings_23_28
gen ln_earnings_23_28 = ln(aux)
drop aux

//No Minimum Wage
egen double aux = rowmean(earningsMW_23 earningsMW_24 earningsMW_25 earningsMW_26)
cap drop ln_earningsMW_23_26
gen ln_earningsMW_23_26 = ln(aux)
drop aux

egen double aux = rowmean(earningsMW_23 earningsMW_24 earningsMW_25 earningsMW_26 earningsMW_27 earningsMW_28)
cap drop ln_earningsMW_23_28
gen ln_earningsMW_23_28 = ln(aux)
drop aux

cap drop earnings_?? earningsMW_?? 

//Earnings After Graduation
foreach t of numlist 1(1)6 {
	cap drop earnings_`t'
	gen double earnings_`t' = .
	cap drop earningsMW_`t'
	gen double earningsMW_`t' = .
	
	cap drop year_earn_`t'
	gen year_earn_`t' = .
	cap drop year_earnMW_`t'
	gen year_earnMW_`t' = .
	
	foreach year of numlist 2008(1)2016 {
		
	replace earnings_`t' = earnings_`year' if grad_year + `t' 	== `year' & grad_sem == 1 & earnings_`t' == .
	replace earnings_`t' = earnings_`year' if grad_year + `t' + 1 == `year' & grad_sem == 2 & earnings_`t' == .
	replace year_earn_`t' = `year' if earnings_`t' != . & year_earn_`t' == .	
	
	replace earningsMW_`t' = earningsMW_`year' if grad_year + `t' 	== `year' & grad_sem == 1 & earningsMW_`t' == .
	replace earningsMW_`t' = earningsMW_`year' if grad_year + `t' + 1 == `year' & grad_sem == 2 & earningsMW_`t' == .
	replace year_earnMW_`t' = `year' if earningsMW_`t' != . & year_earnMW_`t' == .
	
	}
	
	cap drop ln_earnings_`t'
	gen double ln_earnings_`t' = ln(earnings_`t')
	cap drop ln_earningsMW_`t'
	gen double ln_earningsMW_`t' = ln(earningsMW_`t')
}
order year_earn_? year_earnMW_? earnings_? earningsMW_? ln_earnings_? ln_earningsMW_?, last

//First Observed Earnings
cap drop ln_earnings
egen double ln_earnings = rowfirst(ln_earnings_?)
cap drop ln_earningsMW
egen double ln_earningsMW = rowfirst(ln_earningsMW_?)

//Year when earnings are observed
cap drop year_earnings
egen double year_earnings = rowfirst(year_earn_?)
cap drop year_earningsMW
egen double year_earningsMW = rowfirst(year_earnMW_?)

//Month when earnings are observed
cap drop month_earnings
gen month_earnings = .
cap drop month_earningsMW
gen month_earningsMW = .
foreach year of numlist 2009(1)2016{
	replace month_earnings = pila_earnings_month_`year' if year_earnings == `year'
	replace month_earningsMW = pila_earnings_month_`year' if year_earningsMW == `year' 
}
replace month_earnings = 9 if month_earnings == 10 //Only 2012 includes november
replace month_earningsMW = 9 if month_earningsMW == 10

//Imputation
replace month_earnings = 9 if ln_earnings != . & month_earnings == .
replace month_earningsMW = 9 if ln_earningsMW != . & month_earningsMW == .

tostring month_earnings month_earningsMW, gen(month monthMW)
tostring year_earnings year_earningsMW, gen(year yearMW) 

//Date when first observed in labor market
cap drop labormkt_date
gen labormkt_date = date("30/" + month + "/" + year, "DMY")
cap drop labormkt_dateMW
gen labormkt_dateMW = date("30/" + monthMW + "/" + yearMW, "DMY")
format labormkt_date labormkt_dateMW %td

//Months From Exam to Earnings
cap drop months_exam_to_earnings
gen months_exam_to_earnings = round((labormkt_date - exam_date)/30, 1)
replace months_exam_to_earnings = 1 if months_exam_to_earnings <= 0 
cap drop months_exam_to_earningsMW
gen months_exam_to_earningsMW = round((labormkt_dateMW - exam_date)/30, 1)
replace months_exam_to_earningsMW = 1 if months_exam_to_earningsMW <= 0 

//Months From Grad to Earnings
cap drop months_grad_to_earnings
gen months_grad_to_earnings = round((labormkt_date - grad_date)/30, 1)
replace months_grad_to_earnings = 1 if months_grad_to_earnings <= 0 
cap drop months_grad_to_earningsMW
gen months_grad_to_earningsMW = round((labormkt_dateMW - grad_date)/30, 1)
replace months_grad_to_earningsMW = 1 if months_grad_to_earningsMW <= 0 

drop month monthMW year yearMW

/****************************
Firm (Employer) Wage Premium
*****************************/
//Wage Premium Measures
preserve
	use "Data/Finals/AKMRanking.dta", clear
	merge 1:1 firm_id using "Data/Finals/FirmIndustryRanking.dta"
		
	keep firm_id firm_rank rank_ciiu rank_earnings
	rename (firm_rank rank_earnings rank_ciiu) (AKMRank CIIURank AKMCIIURank)
	
	sum AKMRank 
	local AKMRank_mean = r(mean)
	local AKMRank_sd = r(sd)
	sum CIIURank
	local CIIURank_mean = r(mean)
	local CIIURank_sd = r(sd)
	sum AKMCIIURank
	local AKMCIIURank_mean = r(mean)
	local AKMCIIURank_sd = r(sd)
	
	tempfile FirmsWageRanking
	save `FirmsWageRanking', replace
restore

//Statistics of Both Measures of Productivity
cap drop AKMRank_mean AKMRank_sd CIIURank_mean CIIURank_sd AKMCIIURank_mean AKMCIIURank_sd
gen double AKMRank_mean = `AKMRank_mean'
gen double AKMRank_sd = `AKMRank_sd'
gen double CIIURank_mean = `CIIURank_mean'
gen double CIIURank_sd = `CIIURank_sd'
gen double AKMCIIURank_mean = `AKMCIIURank_mean'
gen double AKMCIIURank_sd = `AKMCIIURank_sd'

//Measures Across Time
cap drop firm_id_*
foreach year of numlist 2009(1)2016 {
	
	egen firm_id = sieve(pila_firm_id_`year'), keep(n)
	replace firm_id = "" if pila_firm_type_`year' != "NI" & pila_firm_type_`year' != "NIT"
	replace firm_id = "" if earnings_`year' == .
	
	merge m:1 firm_id using `FirmsWageRanking', keep(master matched)
	keep if _merge == 3 | _merge == 1
	replace firm_id = "" if _merge != 3
	drop _merge firm_id
	
	cap drop AKMRank_`year'
	rename AKMRank AKMRank_`year'
	cap drop CIIURank_`year'
	rename CIIURank CIIURank_`year'
	cap drop AKMCIIURank_`year'
	rename AKMCIIURank AKMCIIURank_`year'
}

//Wage Premium between 23 and 26
foreach age of numlist 23(1)30 {
	cap drop AKMRank_`age'
	gen double AKMRank_`age' = .
	cap drop CIIURank_`age'
	gen double CIIURank_`age' = .
	cap drop AKMCIIURank_`age'
	gen double AKMCIIURank_`age' = .
	
	foreach year of numlist 2009(1)2016 { 
		replace AKMRank_`age' = AKMRank_`year' 	 if `age' == `year' - year(birthdate)
		replace CIIURank_`age' = CIIURank_`year' if `age' == `year' - year(birthdate)
		replace AKMCIIURank_`age' = AKMCIIURank_`year' if `age' == `year' - year(birthdate)
	}
}

//Average Ranking Between 23 and 26
cap drop AKMRank_avg2326
egen double AKMRank_avg2326 = rowmean(AKMRank_23 AKMRank_24 AKMRank_25 AKMRank_26)
cap drop CIIURank_avg2326
egen double CIIURank_avg2326 = rowmean(CIIURank_23 CIIURank_24 CIIURank_25 CIIURank_26)
cap drop AKMCIIURank_avg2326
egen double AKMCIIURank_avg2326 = rowmean(AKMCIIURank_23 AKMCIIURank_24 AKMCIIURank_25 AKMCIIURank_26)

//Ranking First Observed Between 23 and 26
cap drop AKMRank_fst2326
egen double AKMRank_fst2326 = rowfirst(AKMRank_23 AKMRank_24 AKMRank_25 AKMRank_26)
cap drop CIIURank_fst2326
egen double CIIURank_fst2326 = rowfirst(CIIURank_23 CIIURank_24 CIIURank_25 CIIURank_26)
cap drop AKMCIIURank_fst2326
egen double AKMCIIURank_fst2326 = rowfirst(AKMCIIURank_23 AKMCIIURank_24 AKMCIIURank_25 AKMCIIURank_26)

//Wage Premium After Graduation
foreach t of numlist 1(1)6 {
	cap drop AKMRank_`t'
	gen double AKMRank_`t' = .
	cap drop CIIURank_`t'
	gen double CIIURank_`t' = .
	cap drop year_Rank_`t' = 
	gen year_Rank_`t' = .
	cap drop AKMCIIURank_`t'
	gen double AKMCIIURank_`t' = .	
	
	foreach year of numlist 2009(1)2016 {
	replace AKMRank_`t' = AKMRank_`year' if grad_year + `t' == `year' & grad_sem == 1 & AKMRank_`t' == .
	replace AKMRank_`t' = AKMRank_`year' if grad_year + `t' + 1 == `year' & grad_sem == 2 & AKMRank_`t' == .

	replace CIIURank_`t' = CIIURank_`year' if grad_year + `t' == `year' & grad_sem == 1 & CIIURank_`t' == .
	replace CIIURank_`t' = CIIURank_`year' if grad_year + `t' + 1 == `year' & grad_sem == 2 & CIIURank_`t' == .
	
	replace year_Rank_`t' = `year' if AKMRank_`t' != . & year_Rank_`t' == .
	
	replace AKMCIIURank_`t' = AKMCIIURank_`year' if grad_year + `t' == `year' & grad_sem == 1 & AKMCIIURank_`t' == .
	replace AKMCIIURank_`t' = AKMCIIURank_`year' if grad_year + `t' + 1 == `year' & grad_sem == 2 & AKMCIIURank_`t' == .
	}
}

egen AKMRank_avg = rowmean(AKMRank_?)
egen AKMRank_fst = rowfirst(AKMRank_?)
egen CIIURank_avg = rowmean(CIIURank_?)
egen CIIURank_fst = rowfirst(CIIURank_?)
egen AKMCIIURank_avg = rowmean(AKMCIIURank_?)
egen AKMCIIURank_fst = rowfirst(AKMCIIURank_?)

/****************************************
Firm (Employer) Wage Premium Across-Time
*****************************************/
//Up to 3 Years After First Observed Employer
egen year_Rank = rowfirst(year_Rank_?)
foreach t of numlist 0(1)2 {
	local T = `t'+1
	cap drop AKMRank_year`T'
	gen double AKMRank_year`T' = .
	cap drop CIIURank_year`T'
	gen double CIIURank_year`T' = .	
	cap drop AKMCIIURank_year`T'
	gen double AKMCIIURank_year`T' = .	
	foreach year of numlist 2009(1)2016 {	
		replace AKMRank_year`T' = AKMRank_`year' if `year' == year_Rank + `t' & year_Rank != .
		replace CIIURank_year`T' = CIIURank_`year' if `year' == year_Rank + `t' & year_Rank != .
		replace AKMCIIURank_year`T' = AKMCIIURank_`year' if `year' == year_Rank + `t' & year_Rank != .
	}	
}

//Observed Employer Overtime
cap drop AKMRank_tau* CIIURank_tau* AKMCIIURank_tau* 
preserve
keep consecutivo AKMRank_? CIIURank_? year_Rank_? AKMCIIURank_?
reshape long AKMRank_ CIIURank_ year_Rank_ AKMCIIURank_, i(consecutivo) j(time)
drop if AKMRank_ == .
drop time
sort consecutivo
by consecutivo : egen tau = seq() 
rename (AKMRank_ CIIURank_ year_Rank_ AKMCIIURank_) (AKMRank_tau CIIURank_tau YearRank_tau AKMCIIURank_tau)
reshape wide AKMRank_tau CIIURank_tau YearRank_tau AKMCIIURank_tau, i(consecutivo) j(tau)
tempfile JobLadder
save `JobLadder', replace
restore

merge 1:1 consecutivo using `JobLadder'
drop _merge

/*******************************
Specific Skills-Industry Match
********************************/
//Industry Across Time
cap drop ciiu_*
foreach year of numlist 2009(1)2016 {
	gen ciiu = ole_ciiu_`year'
	cap replace ciiu = pila_ciiu_`year' if ciiu == .
	replace ciiu = . if earnings_`year' == . 
	
	merge m:1 ciiu using "Data/Intermediates/IndystryCodes_CIIU3_31.dta", keepusing(ciiu)
	keep if _merge == 3 | _merge == 1
	replace ciiu = . if _merge != 3
	drop _merge

	merge m:1 ciiu field_code using "Data/Finals/Field-Indystry Match.dta", keep(master matched) keepusing(fieldciiu_match?)
	keep if _merge == 3 | _merge == 1
	drop _merge
	
	cap drop CIIUMatch1_`year'
	rename fieldciiu_match1 CIIUMatch1_`year'
	replace CIIUMatch1_`year' = 0 if CIIUMatch1_`year' == . & ciiu != . & field_code != .
	cap drop CIIUMatch2_`year'
	rename fieldciiu_match2 CIIUMatch2_`year'
	replace CIIUMatch2_`year' = 0 if CIIUMatch2_`year' == . & ciiu != . & field_code != .
	
	rename ciiu ciiu_`year'
}

//Industry across Age
foreach age of numlist 23(1)30 {
	cap drop CIIUMatch*_`age'
	gen CIIUMatch1_`age' = .
	gen CIIUMatch2_`age' = .	
	
	foreach year of numlist 2009(1)2016 {
		replace CIIUMatch1_`age' = CIIUMatch1_`year' if `age' == `year' - year(birthdate) & CIIUMatch1_`age' == .
		replace CIIUMatch2_`age' = CIIUMatch2_`year' if `age' == `year' - year(birthdate) & CIIUMatch2_`age' == .		
	}
}

//Industry Match by Age 26
cap drop CIIUMatch1_fst2623
egen CIIUMatch1_fst2623 = rowfirst(CIIUMatch1_26 CIIUMatch1_25 CIIUMatch1_24 CIIUMatch1_23)
cap drop CIIUMatch2_fst2623
egen CIIUMatch2_fst2623 = rowfirst(CIIUMatch2_26 CIIUMatch2_25 CIIUMatch2_24 CIIUMatch2_23)

//Industry Match After Graduation
foreach t of numlist 1(1)6 {
	cap drop CIIUMatch1_`t'
	gen double CIIUMatch1_`t' = .
	cap drop CIIUMatch2_`t'
	gen double CIIUMatch2_`t' = .
	
	foreach year of numlist 2009(1)2016 {
	replace CIIUMatch1_`t' = CIIUMatch1_`year' if grad_year + `t' == `year' & grad_sem == 1 & CIIUMatch1_`t' == .
	replace CIIUMatch1_`t' = CIIUMatch1_`year' if grad_year + `t' + 1 == `year' & grad_sem == 2 & CIIUMatch1_`t' == .

	replace CIIUMatch2_`t' = CIIUMatch2_`year' if grad_year + `t' == `year' & grad_sem == 1 & CIIUMatch2_`t' == .
	replace CIIUMatch2_`t' = CIIUMatch2_`year' if grad_year + `t' + 1 == `year' & grad_sem == 2 & CIIUMatch2_`t' == .
	}
}

cap drop CIIUMatch2_avg
egen CIIUMatch2_avg = rowmean(CIIUMatch2_?)
replace CIIUMatch2_avg = 0 if CIIUMatch2_avg <  0.5 & CIIUMatch2_avg != .
replace CIIUMatch2_avg = 1 if CIIUMatch2_avg >= 0.5 & CIIUMatch2_avg !=.

cap drop CIIUMatch1_avg
egen CIIUMatch1_avg = rowmean(CIIUMatch1_?)
replace CIIUMatch1_avg = 0 if CIIUMatch1_avg <  0.5 & CIIUMatch1_avg != .
replace CIIUMatch1_avg = 1 if CIIUMatch1_avg >= 0.5 & CIIUMatch1_avg !=.

cap drop CIIUMatch1_fst
egen CIIUMatch1_fst = rowfirst(CIIUMatch1_?)
cap drop CIIUMatch2_fst
egen CIIUMatch2_fst = rowfirst(CIIUMatch2_?)

cap drop CIIUMatch1_ever
egen CIIUMatch1_ever = rowmax(CIIUMatch1_?)
cap drop CIIUMatch2_ever
egen CIIUMatch2_ever = rowmax(CIIUMatch2_?)

/****************************
Skills Transferability
*****************************/
rename (field_name field_code) (fieldname fieldcode)
bys exam_id : egen aux = mode(fieldcode) if exam_id != .
gen field_code = fieldcode
replace field_code = aux if field_code == .
drop aux
bys field_code : egen field_name = mode(fieldname)
order fieldcode field_name field_code, after(fieldname)

cap drop num_industries
merge m:1 field_code using "Data/Finals/SkillsTransferability.dta"
keep if _merge == 3 | _merge == 1
drop _merge

/****************************
Outher Outcomes
*****************************/
//Months to Graduation
gen months_exam_to_grad = round((grad_date - exam_date)/30, 0.1)

//Number of Subjects Taken and Approved by Graduation
sum spa_classtaken, d
replace spa_classtaken = . if spa_classtaken == 0
replace spa_classtaken = . if spa_classtaken <= 10
replace spa_classtaken = . if spa_classtaken > 128
replace spa_classpassed = . if spa_classpassed == 0
replace spa_classpassed = . if spa_classpassed <= 5
replace spa_classpassed = . if spa_classpassed > 128

sum spro_classtaken, d
replace spro_classtaken = . if spa_classtaken == .
replace spro_classtaken = . if spro_classtaken > 48
replace spro_classpassed = . if spa_classpassed == .
replace spro_classpassed = . if spro_classpassed > 48

gen tot_classtaken = spa_classtaken if spa_gradtime >= exam_time & spa_gradtime !=.
gen classtaken_aftexam = spro_classtaken if spa_gradtime >= exam_time & spa_gradtime !=.
gen classpass_aftexam = spro_classpassed if spa_gradtime >= exam_time & spa_gradtime !=.

cap drop aux
gen aux = 1 if !missing(tot_classtaken, classtaken_aftexam, classpass_aftexam)
foreach var of varlist tot_classtaken *_aftexam {
	replace `var' = . if `var' != . & aux == .
}
drop aux

//Graduate Education (Within 5 years of the Exam)
tostring fst_g_gradsch, gen(gradsch_year)
replace gradsch_year = substr(gradsch_year, 1, 4)
destring gradsch_year, replace

gen gradsch = 1 if gradsch_year - exam_year <= 5 & gradsch_year - exam_year > 0
replace gradsch = 0 if gradsch == .

/*******************************************************************************
Labels
********************************************************************************/

lab var stratum "Socioeconomic Stratum"
lab var meduc_cllg "Mother's Educ: College" 
lab var feduc_cllg "Father's Educ: College" 
lab var female "Female"
lab var main_city "Lives in Main City"
lab var working "Working at Test Date"
lab var age "Age at Test Date"
lab var cllg_prv "Private College" 
lab var top5 "College Ranking: 1-5"
lab var top20 "College Ranking: 6-20"
lab var above20 "College Ranking: 21+"
lab var reading "Reading Score ({&sigma})"
lab var english "English Score ({&sigma})"
lab var sb11score "High School Score ({&sigma})"
lab var num_semesters "Semesters in College"
lab var low_SES "Low-Socioeconomic Status"
lab var high_SES "High-Socioeconomic Status"

gen exam_field = ""
replace exam_field = "Business" if exam_name == "ADMINISTRACION"
replace exam_field = "Architecture" if exam_name == "ARQUITECTURA"
replace exam_field = "Clinical Lab" if exam_name == "BACTERIOLOGIA"
replace exam_field = "Biology" if exam_name == "BIOLOGIA"
replace exam_field = "Communications" if exam_name == "COMUNICACION E INFORMACION"
replace exam_field = "Accounting" if exam_name == "CONTADURIA"
replace exam_field = "Law" if exam_name == "DERECHO"
replace exam_field = "Economics" if exam_name == "ECONOMIA"
replace exam_field = "Physical Education" if regexm(exam_name, "DEPORTES")
replace exam_field = "Nursery" if exam_name == "ENFERMERIA"
replace exam_field = "Physics" if exam_name == "FISICA"
replace exam_field = "Physiotherapy" if exam_name == "FISIOTERAPIA"
replace exam_field = "Speech Therapy" if exam_name == "FONOAUDIOLOGIA"
replace exam_field = "Geology" if exam_name == "GEOLOGIA"
replace exam_field = "Agricultural Eng." if exam_name == "INGENIERIA AGRICOLA"
replace exam_field = "Agroindustrial Eng." if exam_name == "INGENIERIA AGROINDUSTRIAL"
replace exam_field = "Agronomic Eng." if exam_name == "INGENIERIA AGRONOMICA Y AGRONOMIA"
replace exam_field = "Enviromental Eng." if exam_name == "INGENIERIA AMBIENTAL"
replace exam_field = "Civil Eng." if exam_name == "INGENIERIA CIVIL"
replace exam_field = "Food Eng." if exam_name == "INGENIERIA DE ALIMENTOS"
replace exam_field = "Petroleum Eng." if exam_name == "INGENIERIA DE PETROLEOS"
replace exam_field = "Systems Eng." if exam_name == "INGENIERIA DE SISTEMAS"
replace exam_field = "Electric Eng." if exam_name == "INGENIERIA ELECTRICA"
replace exam_field = "Electronic Eng." if exam_name == "INGENIERIA ELECTRONICA"
replace exam_field = "Forest Eng." if exam_name == "INGENIERIA FORESTAL"
replace exam_field = "Industrial Eng." if exam_name == "INGENIERIA INDUSTRIAL"
replace exam_field = "Mechanical Eng." if exam_name == "INGENIERIA MECANICA"
replace exam_field = "Chemical Eng." if exam_name == "INGENIERIA QUIMICA"
replace exam_field = "Surgical Instrument" if exam_name == "INSTRUMENTACION QUIRURGICA"
replace exam_field = "Spanish Educ." if exam_name == "LICENCIATURA EN EDUCACION BASICA ENFASIS EN HUMANIDADES Y LENGUA CASTELLANA"
replace exam_field = "Preschool Educ." if exam_name == "LICENCIATURA EN PEDAGOGIA INFANTIL PREESCOLAR O ESTIMULACION TEMPRANA"
replace exam_field = "Natural Sc. Educ." if regexm(exam_name, "ENFASIS EN CIENCIAS NATURALES")
replace exam_field = "Social Sc. Educ." if regexm(exam_name, "ENFASIS EN CIENCIAS SOCIALES")
replace exam_field = "Mathematics Educ." if regexm(exam_name, "ENFASIS EN MATEMATICAS")
replace exam_field = "French" if exam_name == "LICENCIATURA EN LENGUAS MODERNAS FRANCES"
replace exam_field = "English" if exam_name == "LICENCIATURA EN LENGUAS MODERNAS INGLES"
replace exam_field = "Mathematics" if exam_name == "MATEMATICA"
replace exam_field = "Medicine" if exam_name == "MEDICINA"
replace exam_field = "Veterinary" if exam_name == "MEDICINA VETERINARIA"
replace exam_field = "Zootechnics" if exam_name == "ZOOTECNIA"
replace exam_field = "Veterinary/Zootech." if exam_name == "MEDICINA VETERINARIA Y ZOOTECNIA"
replace exam_field = "Nutrition" if exam_name == "NUTRICION Y DIETETICA"
replace exam_field = "Dentistry" if exam_name == "ODONTOLOGIA"
replace exam_field = "Optometry" if exam_name == "OPTOMETRIA"
replace exam_field = "Psychology" if exam_name == "PSICOLOGIA"
replace exam_field = "Chemistry" if exam_name == "QUIMICA"
replace exam_field = "Occup. Therapy" if exam_name == "TERAPIA OCUPACIONAL"
replace exam_field = "Social Work" if exam_name == "TRABAJO SOCIAL"
replace exam_field = "Pedagody Diploma" if exam_name == "NORMALISTAS SUPERIORES"
replace exam_field = "Electronic 2Y" if exam_name == "TECNICO EN ELECTRONICA Y AFINES"
replace exam_field = "Systems 2Y" if exam_name == "TECNICO EN SISTEMAS Y AFINES"
replace exam_field = "Business 2Y" if regexm(exam_name, "TECNICO PROFESIONAL EN ADMINISTRACION")
replace exam_field = "Business 3Y" if exam_name == "TECNOLOGIA EN ADMINISTRACION Y AFINES"
replace exam_field = "Electronic 3Y" if exam_name == "TECNOLOGICO EN ELECTRONICA Y AFINES"
replace exam_field = "Systems 3Y" if exam_name == "TECNOLOGICO EN SISTEMAS Y AFINES"
order exam_field, after(exam_name)

cap drop fieldname
gen fieldname = ""
replace fieldname = "Accounting" if field_name == "CONTADURIA PUBLICA"
replace fieldname = "Business" if field_name == "ADMINISTRACION"
replace fieldname = "Education" if field_name == "EDUCACION"
replace fieldname = "Agronomy" if field_name == "AGRONOMIA"
replace fieldname = "Political Sc." if field_name == "CIENCIA POLITICA Y RELACIONES INTERNACIONALES"
replace fieldname = "Biology" if field_name == "BIOLOGIA, MICROBIOLOGIA Y AFINES"
replace fieldname = "Electronic Eng." if field_name == "INGENIERIA ELECTRONICA, TELECOMUNICACIONES Y AFINES"
replace fieldname = "Industrial Eng." if field_name == "INGENIERIA INDUSTRIAL Y AFINES"
replace fieldname = "Psychology" if field_name == "PSICOLOGIA"
replace fieldname = "Agroindustrial Eng." if field_name == "INGENIERIA AGROINDUSTRIAL, ALIMENTOS Y AFINES"
replace fieldname = "System Eng." if field_name == "INGENIERIA DE SISTEMAS, TELEMATICA Y AFINES"
replace fieldname = "Environmental Eng." if field_name == "INGENIERIA AMBIENTAL, SANITARIA Y AFINES"
replace fieldname = "Advertisement" if field_name == "PUBLICIDAD Y AFINES"
replace fieldname = "Dentistry" if field_name == "ODONTOLOGIA"
replace fieldname = "Electric Eng." if field_name == "INGENIERIA ELECTRICA Y AFINES"
replace fieldname = "Math and Statistics" if field_name == "MATEMATICAS, ESTADISTICA Y AFINES"
replace fieldname = "Chemistry" if field_name == "QUIMICA Y AFINES"
replace fieldname = "Economics" if field_name == "ECONOMIA"
replace fieldname = "Journalism/Communications" if field_name == "COMUNICACION SOCIAL, PERIODISMO Y AFINES"
replace fieldname = "Law" if field_name == "DERECHO Y AFINES"
replace fieldname = "Zootechnics" if field_name == "ZOOTECNIA"
replace fieldname = "Biomedical Eng." if field_name == "INGENIERIA BIOMEDICA Y AFINES"
replace fieldname = "Therapy" if field_name == "TERAPIAS"
replace fieldname = "Livestock/Agronomic Eng." if field_name == "INGENIERIA AGRONOMICA, PECUARIA Y AFINES"
replace fieldname = "Civil Eng." if field_name == "INGENIERIA CIVIL Y AFINES"
replace fieldname = "Medicine" if field_name == "MEDICINA"
replace fieldname = "Sociology/Social Work" if field_name == "SOCIOLOGIA, TRABAJO SOCIAL Y AFINES"
replace fieldname = "Mechanical Eng." if field_name == "INGENIERIA MECANICA Y AFINES"
replace fieldname = "Veterinary" if field_name == "MEDICINA VETERINARIA"
replace fieldname = "Business Eng" if field_name == "INGENIERIA ADMINISTRATIVA Y AFINES"
replace fieldname = "Engineering (others)" if field_name == "OTRAS INGENIERIAS"
replace fieldname = "Clinical Lab" if field_name == "BACTERIOLOGIA"
replace fieldname = "Nutrition and Diet" if field_name == "NUTRICION Y DIETETICA"
replace fieldname = "Forest/Agricult. Eng." if field_name == "INGENIERIA AGRICOLA, FORESTAL Y AFINES"
replace fieldname = "Nursery" if field_name == "ENFERMERIA"
replace fieldname = "Physics" if field_name == "FISICA"
replace fieldname = "Surgical Instrument" if field_name == "INSTRUMENTACION QUIRURGICA"
replace fieldname = "Metallurgical/Mining Eng." if field_name == "INGENIERIA DE MINAS, METALURGIA Y AFINES"
replace fieldname = "Chemical Eng." if field_name == "INGENIERIA QUIMICA Y AFINES"
replace fieldname = "Geology" if field_name == "GEOLOGIA, OTROS PROGRAMAS DE CIENCIAS NATURALES"
replace fieldname = "Optometry" if field_name == "OPTOMETRIA Y OTROS PROGRAMAS DE CIENCIAS DE LA SALUD"
replace fieldname = "History and Geography" if field_name == "GEOGRAFIA, HISTORIA"
replace fieldname = "Physical Education" if field_name == "DEPORTES, EDUCACION FISICA Y RECREACION"
replace fieldname = "Modern Languages" if field_name == "LENGUAS MODERNAS, LITERATURA, LINGUISTICA Y AFINES"
replace fieldname = "Anthropology" if field_name == "ANTROPOLOGIA, ARTES LIBERALES"
replace fieldname = "Architecture" if field_name == "ARQUITECTURA"
replace fieldname = "Arts: Plastic" if field_name == "ARTES PLASTICAS, VISUALES Y AFINES"
replace fieldname = "Arts: Representative" if field_name == "ARTES REPRESENTATIVAS"
replace fieldname = "Library Sc." if field_name == "BIBLIOTECOLOGIA Y OTROS DE CIENCIAS SOCIALES Y HUMANAS"
replace fieldname = "Philosophy" if field_name == "FILOSOFIA, TEOLOGIA Y AFINES"
replace fieldname = "Design" if field_name == "DISEÑO"
replace fieldname = "Military/Police Ed" if field_name == "FORMACION RELACIONADA CON EL CAMPO MILITAR O POLICIAL"
replace fieldname = "Music" if field_name == "MUSICA"
replace fieldname = "Arts: Other" if field_name == "OTROS PROGRAMAS ASOCIADOS A BELLAS ARTES"
replace fieldname = "Public Health" if field_name == "SALUD PUBLICA"
replace fieldname = "" if field_name == "SIN CLASIFICAR"
order fieldname, after(field_name)

/*******************************************************************************
Sample Restrictions 
********************************************************************************/

cap drop sample
gen sample = 1
//Students taking General Skills Exam in 2009 (55,873)
replace sample = 2 if merge_specific_test == 0 & exam_year <= 2009 & sample == 1
//No running variable (139)
replace sample = 3 if (exam_id == . | spro_overallscore == .) & exam_year <= 2009 & sample == 1
//Students in 2-year programs (59,063)
replace sample = 4 if (regexm(exam_name, "TECNICO") | regexm(exam_name, "TECNOLOGI") | regexm(exam_name, "NORMAL")) & sample == 1
replace sample = 5 if (program_level == 2 | program_level == 1) & sample == 1
//Students with previous college diploma (20,941)
gen examtime = exam_time
replace examtime = 20101 if examtime == 20102
replace examtime = 20102 if examtime == 20103
replace sample = 6 if grad_time < examtime & grad_time != . & sample == 1
//Students with graduate diploma (675)
replace sample = 7 if fst_g_gradsch < grad_time & grad_time != . & fst_g_gradsch != . & sample == 1
replace sample = 7 if fst_g_gradsch < examtime & fst_g_gradsch != . & sample == 1
//Covariate restriction (6441)
global X female age stratum meduc_hs meduc_cllg2Y meduc_cllg4Y meduc_cllg2Y meduc_cllg4Y d_semester2 d_semester3 d_semester4 reading english sb11score
foreach x in $X {
	*drop if `x' == .
	replace sample = 8 if `x' == . & sample == 1
}

//Fields with issues in running variable
count if score_sd == .
replace sample = 9 if score_sd == . & sample == 1 & exam_year <= 2009
//Fields we drop some cohorts (9: 7,022)
//Fields we drop all cohorts (10: 17,460)
tab exam_field exam_year if score_sd == .
tab exam_field exam_year
replace sample = 10 if (exam_id == 33 | exam_id == 317 | exam_id == 9) & sample == 9

drop examtime 

//We remain with 93.4% (= 2794/2992) of all awardees in 4/5-year programs
replace award = merge_specific_award if award == .
tab award if sample == 1 | sample > 4
tab award if sample == 1

save "Data/Finals/Estimation Sample.dta", replace

/*******************************************************************************
Create Some Variables
********************************************************************************/

use "Data/Finals/Estimation Sample.dta", clear

keep if exam_year <= 2009
keep if sample == 1

//Skill Transferability
cap drop transferable
sum num_industries, d
gen transferable = num_industries > r(p50) if num_industries != .

//College Reputation
sum cllg_reputation_sd, d
replace cllg_reputation_sd = (cllg_reputation_sd - r(mean))/r(sd)

//Area x Year Fixed Effects
egen year_area = group(exam_year area)
qui tab year_area, gen(yarea_)

//Field x Year Fixed Effects
qui tab fieldtest_time, gen(yfield_)

//Main Outcome
global Y ln_earningsMW

//Nth observed earnings
preserve
keep ln_earningsMW_1 ln_earningsMW_2 ln_earningsMW_3 ln_earningsMW_4 ln_earningsMW_5 ln_earningsMW_6 consecutivo
reshape long ln_earningsMW_, i(consecutivo) j(time)
drop if ln_earningsMW_ == .
bys consecutivo: gen tau = _n
drop time
reshape wide ln_earningsMW_, i(consecutivo) j(tau)
rename ln_earningsMW_* ln_earningsMW_tau*
tempfile nth_earnings
save `nth_earnings'
restore 

merge 1:1 consecutivo using `nth_earnings', nogen

//Earnings N years after first earnings
preserve
keep ln_earningsMW_1 ln_earningsMW_2 ln_earningsMW_3 ln_earningsMW_4 ln_earningsMW_5 ln_earningsMW_6 consecutivo
reshape long ln_earningsMW_, i(consecutivo) j(time)
bys consecutivo: gen indicator = 1 if ln_earningsMW_ != . 
forvalues i = 1/6{
	bys consecutivo: replace indicator = 1 if indicator[_n-1] != . 	
}
bys consecutivo: drop if indicator == .
bys consecutivo: gen tau = _n
drop time indicator
reshape wide ln_earningsMW_, i(consecutivo) j(tau)
rename ln_earningsMW_* ln_earningsMW_tau_plus*
tempfile nth_earnings
save `nth_earnings'
restore 

merge 1:1 consecutivo using `nth_earnings', nogen

//Firm Productivity Measure
foreach var of varlist AKMRank_avg AKMRank_fst AKMRank_tau* {
	gen double `var'_sd = (`var' - AKMRank_mean)/AKMRank_sd
}
gen double AKMRank_delta12 = AKMRank_tau2 - AKMRank_tau1
gen double AKMRank_delta13 = AKMRank_tau3 - AKMRank_tau1

foreach var of varlist CIIURank_avg CIIURank_fst CIIURank_tau* {
	gen double `var'_sd = (`var' - CIIURank_mean)/CIIURank_sd
}
gen double CIIURank_delta12 = CIIURank_tau2 - CIIURank_tau1
gen double CIIURank_delta13 = CIIURank_tau3 - CIIURank_tau1

foreach var of varlist AKMCIIURank_avg AKMCIIURank_fst AKMCIIURank_tau* {
	gen double `var'_sd = (`var' - AKMCIIURank_mean)/AKMCIIURank_sd
}
gen double AKMCIIURank_delta12 = AKMCIIURank_tau2 - AKMCIIURank_tau1
gen double AKMCIIURank_delta13 = AKMCIIURank_tau3 - AKMCIIURank_tau1

//Balanced Sample (Dynamic Effects)
cap drop dynmc_sample
gen dynmc_sample = 1 if !missing(ln_earningsMW_1, ln_earningsMW_2, ln_earningsMW_3)

//Firms Sample
cap drop firms_sample
gen firms_sample = 1 if !missing(AKMRank_avg, AKMRank_fst, CIIURank_avg, CIIURank_fst)

//Industries Sample
cap drop ciiu_sample
gen ciiu_sample = 1 if !missing(CIIUMatch2_avg, CIIUMatch1_avg, CIIUMatch1_fst, CIIUMatch2_fst, CIIUMatch1_ever, CIIUMatch2_ever)

cap drop jobladder_sample
gen jobladder_sample = 1 if !missing(AKMRank_tau1, AKMRank_tau2)

gen aux = 1
append using "Data/Finals/Estimation Sample.dta"
duplicates tag consecutivo, gen(dup)
drop if dup == 1 & aux == .
drop dup aux

save "Data/Finals/Estimation Sample.dta", replace

/******************************************************************************************/

use "Data/Finals/Estimation Sample.dta", clear

keep if exam_year == 2010

//Students Eligible to Get the General Skills Award
preserve
import delimited "Data/Originals/Saber Pro/Saber Pro 20102.txt", encoding("utf-8")  stringc(_all) clear
gen generic_exam = presento_prueba_espe == "NO"
rename estu_consecutivo consecutivo
keep consecutivo generic_exam
tempfile GeneralExam
save `GeneralExam', replace
import delimited "Data/Originals/Saber Pro/Saber Pro 20103.txt", encoding("utf-8")  stringc(_all) clear
gen generic_exam = presento_prueba_espe == "NO"
rename estu_consecutivo consecutivo
keep consecutivo generic_exam
append using `GeneralExam'
save `GeneralExam', replace
restore

merge 1:1 consecutivo using `GeneralExam'
drop _merge 
keep if generic_exam == 1

preserve
use "Data/Intermediates/Awardees_2006_2010_ID.dta", clear
egen general_award = rowmax(distinction1 distinction2 distinction3 distinction4)
keep if general_award == 1
drop general_award
keep if regexm(mejor_periodo, "2010")
drop punt_0 distinction* mejor_periodo
rename (punt_1 punt_2 punt_3 punt_4) (punt_persunderstand punt_english punt_criticalthink punt_problemsolve)
tempfile GeneralAwards
save `GeneralAwards', replace
restore

merge 1:1 consecutivo using `GeneralAwards'
keep if _merge == 1 | _merge == 3
drop _merge

keep if sample == 1

foreach test in persunderstand english criticalthink problemsolve {
    replace punt_`test' = . if punt_`test' != spro_`test' & punt_`test' != . & spro_`test' != .
}

//The National Distinction Award was not Granted within these Areas
drop if program_area == 1 //Agronomy
drop if program_area == 4 //Health

//Standardize Test Scores
gen double writing = spro_writing
sum spro_writing if exam_time == 20102
replace writing = (writing - r(mean))/r(sd) if exam_semester == 2
sum spro_writing if exam_time == 20103
replace writing = (writing - r(mean))/r(sd) if exam_semester == 3
drop if writing == . | reading == .

drop if program_area == .

/****************************
Cutoff and Running Variables
****************************/

foreach test in persunderstand english criticalthink problemsolve {
	sum punt_`test'
	gen double running_`test' = (spro_`test' - r(min)) 
	sum spro_`test'	
	replace running_`test' = running_`test'/r(sd)
}

//Area x Year Fixed Effects
egen yeararea = group(exam_year program_area)
tab yeararea, gen(yeararea_)

tab program_area, gen(darea_)

tempfile EstimationData2010
save `EstimationData2010', replace

use "Data/Finals/Estimation Sample.dta", clear
drop if exam_year == 2010
append using `EstimationData2010'

keep if sample == 1 | sample == 9 | sample == 10
save "Data/Finals/Estimation Sample.dta", replace
